# Project: Serbia Survey
# Authors: William O'Brochta and Patrick Cunha Silva
# Goal: Appendix 
# First Version: April 9, 2021
# This Version: May 10, 2023
# Last Updaded by: Patrick

library(lmtest)
library(car)
library(sandwich)
library(here)
library(fastDummies)

rm(list = ls())

# To calculate differences across samples
zDif <- function(beta1, beta2, se1, se2){
  dif <- beta1 - beta2
  se <- sqrt(se1^2 + se2^2)
  zscores <- dif/se
  pvalues <- ifelse(zscores > 0, (1 - pnorm(zscores)) * 2, pnorm(zscores) * 2)
  out <- list(difference = dif, SE = se, zscore = zscores, pvalues = pvalues)
  return(out)
}

# Load Data
load(here('Data_Replication', 'genderedIdeologies.RData'))

# PCA analysis
pca.out <- prcomp(mydf[, c('better_world', 'most_places', 'influence')], scale. = TRUE)
summary(pca.out)
mydf$pca.1 <- pca.out$x[, 1] * -1 # So, larger values mean more nationalist

# Correlation between statements
round(cor(mydf[, c('Y', 'pca.1', 'better_world', 'most_places', 'influence')]), 2)

# Create a few variables
mydf$non_nat_man <- as.numeric(mydf$milena == 0 & mydf$slogan_cyrillic == 0)
mydf$nat_man <- as.numeric(mydf$milena == 0 & mydf$slogan_cyrillic == 1)
mydf$non_nat_woman <- as.numeric(mydf$milena == 1 & mydf$slogan_cyrillic == 0)
mydf$nat_woman <- as.numeric(mydf$milena == 1 & mydf$slogan_cyrillic == 1)

################ Create Women Respondent
mydf$women_resp <- as.numeric(mydf$gender == 'female')

################################################
####### SI E - Survey Descriptive Stats ########
################################################
# Sample characteristics
prop.table(table(mydf$cyrillic_start))
prop.table(table(mydf$gender))
prop.table(table(mydf$college))
prop.table(table(mydf$ageCat))
prop.table(table(mydf$married))
prop.table(table(mydf$unemployed))
prop.table(table(mydf$urban))
prop.table(table(mydf$serbian))

########################
## Descriptive Stats ###
########################

# Balance plot
mydf <- dummy_cols(mydf, select_columns = c('gender', 'ageCat', 'other_languages'))

covariates <- c(
  "slogan_cyrillic",
  "milena",
  "cyrillic_second",
  "cyrillic_start",
  "Y",
  "pca.1",
  "gender_female",
  "gender_male",
  'gender_no_response',
  "college",
  "ageCat_18-35",
  "ageCat_36-49",
  "ageCat_50-64",
  "ageCat_65+",
  "married",
  "unemployed",
  "urban",
  "other_languages_0",
  "other_languages_1",
  "other_languages_2",
  "other_languages_>2",
  "dif_lc_home",
  "dif_lc_work",
  'cyrillicImportance',
  "serbian",
  "sns",
  "polKnowQ1",
  "polKnowQ2",
  "intPolitics"
)
var.names <- c('Slogan Language =  Cyrillic', 
               'Politician Gender = Woman',
               'Survey Language = Cyrillic',
               'Ad Language = Cyrillic',
               "Perceived Nationalism",
               "Perceived Nationalism - PCA",
               "Gender = Woman",
               "Gender = Man", 
               "Gender = No Response", 
               "Education = College", 
               "Age = 18-35", 
               "Age = 36-49", 
               "Age = 50-64", 
               "Age = 65+", 
               "Marital status = Married", 
               "Unemployed", 
               "Urban", 
               "# of languages spoken = 0",
               "# of languages spoken = 1", 
               "# of languages spoken = 2", 
               "# of languages spoken > 2", 
               "Use of Latin-Cyrillic (at home)", 
               "Use of Latin-Cyrillic (at work)", 
               "Importance of Cyrillic",
               "Serbian", 
               "Party = SNS",
               "Knows the # of MPs", 
               "Knows the presidential term length", 
               "Interest in politics")

stargazer::stargazer(mydf[, covariates], summary = TRUE,
                     covariate.labels = var.names, type = 'text')

################################################
####### SI F - Randomization Checks ############
################################################


covariates <- c(
  "cyrillic_second",
  "cyrillic_start",
  "gender_female",
  "gender_male",
  'gender_no_response',
  "college",
  "ageCat_18-35",
  "ageCat_36-49",
  "ageCat_50-64",
  "ageCat_65+",
  "married",
  "unemployed",
  "urban",
  "other_languages_0",
  "other_languages_1",
  "other_languages_2",
  "other_languages_>2",
  "dif_lc_home",
  "dif_lc_work",
  'cyrillicImportance',
  "serbian",
  "sns",
  "polKnowQ1",
  "polKnowQ2",
  "intPolitics"
)
var.names <- c('Survey Language = Cyrillic',
               'Ad Language = Cyrillic',
               "Gender = Woman",
               "Gender = Man", 
               "Gender = No Response",                
               "Education = College", 
               "Age = 18-35", 
               "Age = 36-49", 
               "Age = 50-64", 
               "Age = 65+", 
               "Marital status = Married", 
               "Unemployed", 
               "Urban", 
               "# of languages spoken = 0",
               "# of languages spoken = 1", 
               "# of languages spoken = 2", 
               "# of languages spoken > 2", 
               "Use of Latin-Cyrillic (at home)", 
               "Use of Latin-Cyrillic (at work)", 
               "Importance of Cyrillic",
               "Serbian", 
               "Party = SNS",
               "Knows the # of MPs", 
               "Knows the presidential term length", 
               "Interest in politics")

# Gender
pvalues <- list()
for(i in 1:length(covariates)){pvalues[[i]] <- t.test(mydf[, covariates[i]] ~ mydf$milena)$p.value}
unlist(pvalues)
pdf(here("WomenPaper", "Plots", 'randomization_gender.pdf'), width = 12)
par(mar = c(6, 15, 1, 1))
plot(x = pvalues, y = 1:length(covariates), 
     xlim = c(0, 1), 
     ylim = c(1,length(covariates)),
     pch = 20, 
     xlab = 'p-value',
     ylab = '', 
     axes = FALSE, 
     cex = 1.75, 
     cex.lab = 1.5)
axis(1, at = seq(0, 1, by = 0.05))
axis(2, at = 1:length(covariates), labels = var.names, las = 1)
abline(v = 0.01, lty = 3)
abline(v = 0.05, lty = 3)
dev.off()

# Ad
pvalues <- list()
for(i in 1:length(covariates)){pvalues[[i]] <- t.test(mydf[, covariates[i]] ~ mydf$slogan_cyrillic)$p.value}
unlist(pvalues)
pdf(here("WomenPaper", "Plots", 'randomization_ad.pdf'), width = 12)
par(mar = c(6, 15, 1, 1))
plot(x = pvalues, y = 1:length(covariates), 
     xlim = c(0, 1), 
     ylim = c(1,length(covariates)),
     pch = 20, 
     xlab = 'p-value',
     ylab = '', 
     axes = FALSE, 
     cex = 1.75, 
     cex.lab = 1.5)
axis(1, at = seq(0, 1, by = 0.05))
axis(2, at = 1:length(covariates), labels = var.names, las = 1)
abline(v = 0.01, lty = 3)
abline(v = 0.05, lty = 3)
dev.off()

#####################################################
####### SI G - Additional Dependent Variable ########
#####################################################

#### Run Main Model
m1 <- lm(pca.1 ~ slogan_cyrillic * milena + cyrillic_second + cyrillic_start, data = mydf) # No Controls
se1 <- sqrt(diag(vcovHC(m1, 'HC2')))
###Note: LH tests can be directly recovered from the following model:
m1_alt <- lm(pca.1 ~ nat_man + non_nat_man + non_nat_woman + cyrillic_second + cyrillic_start, data = mydf) # No Controls
se_alt1 <- sqrt(diag(vcovHC(m1_alt, 'HC2')))
coeftest(m1_alt, vcovHC(m1_alt, 'HC2'))

#### Run Model for Men Respondents
m2 <- lm(pca.1 ~ slogan_cyrillic * milena + cyrillic_second + cyrillic_start, data = subset(mydf, gender == 'male')) # No Controls
se2 <- sqrt(diag(vcovHC(m2, 'HC2')))
###Note: LH tests can be directly recovered from the following model (for men subsample):
m2_alt <- lm(pca.1 ~ nat_man + non_nat_man + non_nat_woman + cyrillic_second + cyrillic_start, data = subset(mydf, gender == 'male')) # No Controls
se2_alt <- sqrt(diag(vcovHC(m2_alt, 'HC2')))
coeftest(m2_alt, vcovHC(m2_alt, 'HC2'))

#### Run Model for Women Respondents
m3 <- lm(pca.1 ~ slogan_cyrillic * milena + cyrillic_second + cyrillic_start, data = subset(mydf, gender == 'female')) # No Controls
se3 <- sqrt(diag(vcovHC(m3, 'HC2')))
###Note: LH tests can be directly recovered from the following model (for women subsample):
m3_alt <- lm(pca.1 ~ nat_man + non_nat_man + non_nat_woman + cyrillic_second + cyrillic_start, data = subset(mydf, gender == 'female')) # No Controls
se3_alt <- sqrt(diag(vcovHC(m3_alt, 'HC2')))
coeftest(m3_alt, vcovHC(m3_alt, 'HC2'))

# Create Table
stargazer::stargazer(m1, m2, m3, 
                     se = list(se1, se2, se3),
                     covariate.labels = c('Slogan Alphabet = Cyrillic',
                                          "Politician's Gender = Woman",
                                          "Slogan Alphabet = Cyrillic x Politician's Gender = Woman",
                                          "Survey Alphabet = Cyrillic",
                                          'Survey Ad = Cyrillic'),
                     dep.var.labels = 'Perceived Nationalism',
                     column.labels = c('Full Sample', 'Men Respondents', 'Women Respondents'),
                     single.row = TRUE,
                     no.space = TRUE ,
                     omit.stat = c("ser", 'f'),                     
                     style = 'apsr',
                     type = 'text',
                     star.cutoffs = c(0.05, 0.01, 0.001),                    
                     title = 'Gender and Script on Citizens’ Perception of Politicians Regarding Nationalism---PCA First Component')


#################################
## SI H - Models With Controls ##
#################################

## With Controls, ad and survey + pre-treatment
m1 <- lm(Y ~ slogan_cyrillic * milena + cyrillic_second + cyrillic_start  +
           gender + college + ageCat + married + unemployed + urban +
           other_languages + dif_lc_home + dif_lc_work + cyrillicImportance +
           polKnowQ1 + polKnowQ2 + intPolitics, data = mydf) 
se1 <- sqrt(diag(vcovHC(m1, 'HC2')))

## With Controls, ad and survey + pre-treatment
m2 <- lm(Y ~ slogan_cyrillic * milena + cyrillic_second + cyrillic_start +
           college + ageCat + married + unemployed + urban +
           other_languages + dif_lc_home + dif_lc_work + cyrillicImportance +
           polKnowQ1 + polKnowQ2 + intPolitics, data = subset(mydf, gender == 'male')) 
se2 <- sqrt(diag(vcovHC(m2, 'HC2')))

m3 <- lm(Y ~ slogan_cyrillic * milena + cyrillic_second + cyrillic_start +
           college + ageCat + married + unemployed + urban +
           other_languages + dif_lc_home + dif_lc_work + cyrillicImportance +
           polKnowQ1 + polKnowQ2 + intPolitics, data = subset(mydf, gender == 'female')) 
se3 <- sqrt(diag(vcovHC(m3, 'HC2')))


stargazer::stargazer(m1, m2, m3, 
                     se = list(se1, se2, se3),
                     column.labels = c('Full Sample', 'Men Respondents', 'Women Respondents'),                     
                     covariate.labels = c('Slogan Alphabet = Cyrillic',
                                          "Politician's Gender = Woman",
                                          "Survey Alphabet = Cyrillic",
                                          'Survey Ad = Cyrillic',
                                          "Gender = Man",
                                          "Gender = No Response",
                                          "Education = College",
                                          "Age = 36-49",
                                          "Age = 50-64",
                                          "Age = 65+",
                                          "Marital status = Married",
                                          "Unemployed",
                                          "Urban",
                                          "# of languages spoken = 1",
                                          "# of languages spoken = 2",
                                          "# of languages spoken > 2",
                                          "Use of Latin-Cyrillic (at home)",
                                          "Use of Latin-Cyrillic (at work)",
                                          "Importance of Cyrillic",
                                          "Knows the # of MPs",
                                          "Knows the presidential term length",
                                          "Interest in politics",
                                          "Slogan Alphabet = Cyrillic x Politician's Gender = Woman"                                          
                     ),
                     dep.var.labels = 'Perceived Nationalism',
                     single.row = TRUE,
                     no.space = TRUE ,
                     omit.stat = c("ser", 'f'),                     
                     style = 'apsr',
                     type = 'text',
                     star.cutoffs = c(0.05, 0.01, 0.001),                     
                     title = 'Gender and Script on Citizens’ Perception of Politicians Regarding Nationalism --- With Controls'
                     )

